"""
css选择器
xpath选择器
"""
#
# hxml_str = """
# <!DOCTYPE html>
# <html lang="en">
# <head>
#     <meta charset="UTF-8">
#     <title>Title</title>
# </head>
# <body>
# <ul>
#     <li id="li1">li1</li>
#     <li id="li2">li2</li>
#     <li id="li3">li3</li>
# </ul>
#
# <ol>
#     <li id="li1"><a href="https://www.baidu.com">百度</a></li>
#     <li id="li2"><a href="https://www.jd.com">京东</a></li>
#     <li id="li3"><a href="https://www.taobao.com">淘宝</a></li>
# </ol>
#
# <h1><a href="https://www.pdd.com">拼多多</a></h1>
# </body>
# </html>
# """

# from lxml import etree
#
# tree = etree.HTML(hxml_str)
# print(type(tree))
# print(dir(tree))

# titles = tree.cssselect('title')
# for title in titles:
#     print(type(title), title.text, title.attrib, title.tag)
# print("============")
# lis = tree.cssselect('li')
# for li in lis:
#     print(li.text, li.attrib, li.tag)
# print("============")
# lis = tree.cssselect('ol li')
# for li in lis:
#     print(li.text, li.attrib, li.tag)
#     a = li.cssselect('a')[0]
#     print(a.text, a.attrib, a.tag)



# html = tree.xpath('/html')
# print(html, type(html))
#
# title = tree.xpath('//title')
# print(title, type(title))
#
#
#
# ul = tree.xpath('//ul')[0]
# lis = ul.xpath('./li')
# print(lis)
# ol2 = ul.xpath('../ol')
# print(ol2)
#
# lis = tree.xpath('//ol/li')
# print(lis)
# print("============")
#
#
# a_s = ol2[0].xpath('.//a')
# print(a_s)
#
# for a in a_s:
#     print(a.text, a.attrib.get('href'))


# a_s = tree.xpath('//a')
# for a in a_s:
#     print(a.xpath('./text()'), a.xpath('./@href'))

# print(tree.xpath('//h1/a/text()'), tree.xpath('//h1/a/@href'))




html_str = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<ul>
    <li id="li1">li1</li>
    <li id="li2">li2</li>
    <li id="li3">li3</li>
</ul>

<ol id="ol1">
    <li id="li11"><a href="https://www.baidu.com">百度</a></li>
    <li id="li22"><a href="https://www.jd.com">京东</a></li>
    <li id="li33"><a href="https://www.taobao.com">淘宝</a></li>
</ol>

<ol id="ol2">
    <li id="li111"><a href="https://www.baidu.com">百度</a></li>
    <li id="li222"><a href="https://www.jd.com">京东</a></li>
    <li id="li333"><a href="https://www.taobao.com">淘宝</a></li>
</ol>

<h1><a href="https://www.pdd.com">拼多多</a></h1>
<h1 class="pdd"><a href="https://www.pdd2.com">拼多多2</a></h1>

</body>
</html>
"""

from lxml import etree
tree = etree.HTML(html_str)
# ols = tree.xpath('//ol')
# print(ols)

# ol2 = tree.xpath('//ol[@id="ol2"]')
# print(ol2, ol2[0].attrib)
#
# ol1 = tree.xpath('//*[@id="ol1"]')
# print(ol1, ol1[0].attrib)


# lis = tree.xpath('//li[1]|//li[last()]|//li[last()-1]')
# for li in lis:
#     print(li.attrib)


# lis = tree.xpath('//li[position()>1]')
# for li in lis:
#     print(li.attrib)


# a_s_text = tree.xpath("//h1[@class]/a/text()")
# print(a_s_text)

# lis = tree.xpath('//li[position()<2 or position()>2]')
# print(lis)



# a = tree.xpath('//a[contains(text(), "京")]')
# a = tree.xpath('//a[contains(@href, "jd")]')

# a = tree.xpath('//a[starts-with(text(), "京")]')
a = tree.xpath('//a[starts-with(@href, "http")]')
print(a)